import codecs


def start_space(s):
    sp = 0
    for i in s:
        if i==' ': sp+=1
        else :break
    return sp


dict_data = codecs.open('zhihu/idf.txt.big','r','utf8').read().split('\n')
zhihu_data = codecs.open('zhihu/topic.txt','r','utf8').read().split('\n')

for line in zhihu_data:
    sp = start_space(line)
    line = line[sp:]
    if len(line)<2:continue
    if line.find(' ')!=-1:continue
    if sp>=40: rank=25.
    elif sp>=20: rank=23.
    elif sp>=8: rank=20.
    else :rank=10.
    dict_data.append(line+' '+str(rank))
codecs.open('zhihu/idf-zhihu.txt.big','w','utf8').write('\n'.join(dict_data))



